# Libraries
library(tidyverse)
library(lubridate)
library(waffle)
library(kableExtra)
library(ggthemes)
library(ggalluvial)
# Parameters
input_path <- here::here("data", "mspa_na_data.rds")
metadata_path <- here::here("data-raw", "school_metadata.csv")
figure_out_path <- file.path("~", "Desktop", "jgme_final", "Figures")
total_md <- 153
total_do <- 36
likert_colors <- c("darkgreen","green","orange","red","darkred")
likert_labels <-
c(
"Strongly disagree",
"Somewhat disagree",
"Neither agree nor disagree",
"Somewhat agree",
"Strongly agree"
)
interest_labels <-
c(
"Not at all interested",
"Less interested",
"Undecided",
"Somewhat interested",
"Very interested"
)
my_theme <-
theme(
axis.text.y = element_text(size = 9),
axis.text.x = element_text(size = 9, angle = 45, hjust = 1),
axis.title.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold")
)
mspa_hex <- "#666699"
#===============================================================================
# read in data
na_data <-
input_path %>%
read_rds()
metadata <-
metadata_path %>%
read_csv() %>%
drop_na()
I’ve already done some data cleaning in previous scripts, but there are still a few tweaks that we probably want to make so that everything is human-readable and so that only useful information is carried forward.
First, we want to remove people who did not consent to the survey, since we can’t use their information anyway. We will also remove students who are not in schools within the U.S. or who did not provide the name of the school that they attend.
na_data <-
na_data %>%
filter(consent == "yes", school_attend != "International") %>%
drop_na(school_attend)
And then we can filter out some variables that we don’t need.
na_data <-
na_data %>%
select(-survey_id, -consent, -complete)
And create a few variables that we’ll need later:
na_data <-
na_data %>%
mutate(
sex =
case_when(
sab_is_male == "yes" ~ "male",
sab_is_female == "yes" ~ "female",
TRUE ~ NA_character_
),
is_urm = if_else(race_white == "yes", "no", "yes"),
is_gm =
case_when(
gender_agender == "yes" ~ "yes",
gender_genderqueer == "yes" ~ "yes",
gender_transman == "yes" ~ "yes",
gender_transwoman == "yes" ~ "yes",
gender_another == "yes" ~ "yes",
TRUE ~ "no"
)
) %>%
left_join(metadata, by = c("school_attend" = "name")) %>%
mutate(degree_type = factor(Degree, levels = c("MD", "DO"))) %>%
drop_na(degree_type)
na_data
## # A tibble: 1,162 x 116
## participant_id timestamp school_attend med_school_year med_school_year…
## <dbl> <chr> <chr> <chr> <chr>
## 1 1 2018-08-… Stanford Uni… Pre-Clinical S… <NA>
## 2 2 2018-08-… Stanford Uni… Research (PhD,… <NA>
## 3 3 2018-08-… Stanford Uni… Research (PhD,… <NA>
## 4 4 2018-08-… Johns Hopkin… Pre-Clinical S… <NA>
## 5 6 2018-08-… Stanford Uni… Pre-Clinical S… <NA>
## 6 8 2018-08-… Stanford Uni… Research (PhD,… <NA>
## 7 9 2018-08-… Stanford Uni… Clinical Stude… <NA>
## 8 10 2018-08-… Stanford Uni… Pre-Clinical S… <NA>
## 9 11 2018-08-… Stanford Uni… Pre-Clinical S… <NA>
## 10 12 2018-08-… Stanford Uni… Pre-Clinical S… <NA>
## # … with 1,152 more rows, and 111 more variables: is_lgbtq <chr>,
## # sab_is_male <chr>, sab_is_female <chr>, gender_man <chr>,
## # gender_woman <chr>, gender_agender <chr>, gender_genderqueer <chr>,
## # gender_transman <chr>, gender_transwoman <chr>, gender_another <chr>,
## # gender_another_description <chr>, so_asexual <chr>, so_bisexual <chr>,
## # so_gay <chr>, so_lesbian <chr>, so_pansexual <chr>, so_queer <chr>,
## # so_questioning <chr>, so_heterosexual <chr>, so_another <chr>,
## # so_another_description <chr>, race_native <chr>, race_asian <chr>,
## # race_black <chr>, race_pi <chr>, race_white <chr>, race_hispanic <chr>,
## # race_another <chr>, race_another_explanation <chr>,
## # interaction_agree <dbl>, interaction_satisfaction <dbl>,
## # personal_benefit_mspa <dbl>, community_benefit_mspa <dbl>,
## # enhanced_activity_mspa_lgbtq_meded <chr>,
## # enhanced_activity_mspa_social <chr>,
## # enhanced_activity_mspa_di_training <chr>,
## # enhanced_activity_mspa_discrim_bias_reduction <chr>,
## # enhanced_activity_mspa_mentorship <chr>,
## # enhanced_activity_mspa_advocacy <chr>,
## # enhanced_activity_mspa_global_health <chr>,
## # enhanced_activity_mspa_other <chr>,
## # enhanced_activity_mspa_other_explanation <chr>,
## # school_affinity_group_exist <chr>, school_affinity_group_benefit <dbl>,
## # school_affinity_group_involved <chr>, why_not_involved_time <chr>,
## # why_not_involved_value <chr>, why_not_involved_opportunities <chr>,
## # why_not_involved_uninterested <chr>, why_not_involved_not_queer <chr>,
## # why_not_involved_another <chr>, why_not_involved_another_explanation <chr>,
## # school_activities_advocacy <chr>, school_activities_social <chr>,
## # school_activities_mentorship <chr>, school_activities_educational <chr>,
## # school_activities_research <chr>, school_activities_intercollegiate <chr>,
## # school_activities_other <chr>, school_activities_other_explanation <chr>,
## # school_affinity_group_mission <dbl>, school_affinity_group_supported <dbl>,
## # school_affinity_group_identify <dbl>, interest_lgbtq_meded <dbl>,
## # interest_lgbtq_social <dbl>, interest_lgbtq_bias_training <dbl>,
## # interest_lgbtq_advocacy <dbl>, interest_lgbtq_global_health <dbl>,
## # interest_lgbtq_other <chr>, importance_lgbtq_meded <chr>,
## # importance_lgbtq_social <chr>, importance_lgbtq_bias_training <chr>,
## # importance_lgbtq_mentorship <chr>, importance_lgbtq_advocacy <chr>,
## # importance_lgbtq_global_health <chr>, satisfaction_lgbtq_meded <dbl>,
## # satisfaction_lgbtq_social <dbl>, satisfaction_bias_training <dbl>,
## # satisfaction_lgbtq_mentorship <dbl>, satisfaction_lgbtq_advocacy <dbl>,
## # satisfaction_lgbtq_global_health <dbl>, out_classmates_peers <chr>,
## # out_labmates_coworkers_team <chr>, out_mentors <chr>,
## # out_medical_school_app <chr>, out_residency_app <chr>, out_other <chr>,
## # out_other_explanation <chr>, ability_out_classmates_peers <chr>,
## # ability_out_labmates_coworkers_team <chr>, ability_out_mentors <chr>,
## # ability_out_medical_school_app <chr>, ability_out_residency_app <chr>,
## # ability_out_other <chr>, ability_out_other_explanation <chr>,
## # protections_out_classmates_peers <chr>,
## # protections_out_labmates_coworkers_team <chr>,
## # protections_out_mentors <chr>, protections_out_medical_school_app <chr>,
## # protections_out_residency_app <chr>, …
The first section of the survey asked questions about respondents’ demographic identifiers, including the following:
school_attend)med_school_year)is_lgbtq)gender_)so_)race_)degree_type)Below, we calculate the breakdown of our survey respondents by each of these demographics.
Schools are arranged in order of decreasing number of responses.
na_data %>%
count(school_attend, name = "Number of responses") %>%
mutate(
`Percentage of total responses` =
((`Number of responses` / sum(`Number of responses`)) * 100) %>%
round(digits = 1)
) %>%
rename(School = school_attend) %>%
arrange(desc(`Number of responses`)) %>%
knitr::kable()
| School | Number of responses | Percentage of total responses |
|---|---|---|
| Temple University School of Medicine | 84 | 7.2 |
| University of Louisville School of Medicine | 62 | 5.3 |
| University of Oklahoma College of Medicine | 57 | 4.9 |
| Geisinger Commonwealth School of Medicine | 44 | 3.8 |
| Stanford University School of Medicine | 40 | 3.4 |
| University of Pittsburgh School of Medicine | 39 | 3.4 |
| University of New Mexico School of Medicine | 37 | 3.2 |
| Saint Louis University School of Medicine | 34 | 2.9 |
| University of Alabama School of Medicine | 34 | 2.9 |
| University of Arkansas for Medical Sciences/UAMS College of Medicine | 34 | 2.9 |
| Johns Hopkins University School of Medicine | 32 | 2.8 |
| University of Michigan Medical School | 32 | 2.8 |
| Western Michigan University Homer Stryker M.D. School of Medicine | 30 | 2.6 |
| Dell Medical School at The University of Texas at Austin | 28 | 2.4 |
| Chicago Medical School of Rosalind Franklin University of Medicine and Science | 26 | 2.2 |
| Touro University California College of Osteopathic Medicine | 23 | 2.0 |
| University of Wisconsin School of Medicine and Public Health | 23 | 2.0 |
| Georgetown University School of Medicine | 22 | 1.9 |
| Tulane University School of Medicine | 22 | 1.9 |
| Jacobs School of Medicine and Biomedical Sciences, University at Buffalo | 21 | 1.8 |
| Rutgers New Jersey Medical School | 18 | 1.5 |
| University of Texas Southwestern Medical School at Dallas | 17 | 1.5 |
| Harvard Medical School | 15 | 1.3 |
| Columbia University Roy and Diana Vagelos College of Physicians and Surgeons | 14 | 1.2 |
| Washington University School of Medicine | 14 | 1.2 |
| Weill Cornell Medical College | 14 | 1.2 |
| Albert Einstein College of Medicine | 13 | 1.1 |
| Alpert Medical School at Brown University | 13 | 1.1 |
| University of California, Irvine School of Medicine | 13 | 1.1 |
| University of Texas School of Medicine at San Antonio | 11 | 0.9 |
| University of Vermont College of Medicine | 11 | 0.9 |
| Cooper Medical School of Rowan University | 10 | 0.9 |
| University of Iowa Roy J. and Lucille A. Carver College of Medicine | 9 | 0.8 |
| University of Kansas School of Medicine | 9 | 0.8 |
| Vanderbilt University School of Medicine | 9 | 0.8 |
| West Virginia University School of Medicine | 9 | 0.8 |
| Edward Via College of Osteopathic Medicine | 8 | 0.7 |
| Keck School of Medicine of University of Southern California | 8 | 0.7 |
| University of North Texas Health Science Center Texas College of Osteopathic Medicine | 8 | 0.7 |
| Baylor College of Medicine | 7 | 0.6 |
| New York University School of Medicine | 7 | 0.6 |
| Philadelphia College of Osteopathic Medicine - Georgia Campus | 7 | 0.6 |
| State University of New York Downstate Medical Center College of Medicine | 7 | 0.6 |
| University of South Alabama College of Medicine | 7 | 0.6 |
| Michigan State University College of Human Medicine | 6 | 0.5 |
| University of California, San Fransisco School of Medicine | 6 | 0.5 |
| Yale School of Medicine | 6 | 0.5 |
| David Geffen School of Medicine at UCLA | 5 | 0.4 |
| Northwestern University Feinberg School of Medicine | 5 | 0.4 |
| University of Arizona College of Medicine - Phoenix | 5 | 0.4 |
| University of California, Davis School of Medicine | 5 | 0.4 |
| University of Florida College of Medicine | 5 | 0.4 |
| Lake Erie College of Osteopathic Medicine | 4 | 0.3 |
| Meharry Medical College School of Medicine | 4 | 0.3 |
| Pennsylvania State University College of Medicine | 4 | 0.3 |
| Perelman School of Medicine at the University of Pennsylvania | 4 | 0.3 |
| Philadelphia College of Osteopathic Medicine | 4 | 0.3 |
| Touro College of Osteopathic Medicine | 4 | 0.3 |
| University of Utah School of Medicine | 4 | 0.3 |
| University of Washington School of Medicine | 4 | 0.3 |
| Des Moines University College of Osteopathic Medicine | 3 | 0.3 |
| Indiana University School of Medicine | 3 | 0.3 |
| Medical College of Wisconsin | 3 | 0.3 |
| Oklahoma State University Center for Health Sciences College of Osteopathic Medicine | 3 | 0.3 |
| Sidney Kimmel Medical College at Thomas Jefferson University | 3 | 0.3 |
| Stony Brook University School of Medicine | 3 | 0.3 |
| Tufts University School of Medicine | 3 | 0.3 |
| University of Chicago Pritzker School of Medicine | 3 | 0.3 |
| University of Texas Medical School at Houston | 3 | 0.3 |
| A. T. Still University Kirksville College of Osteopathic Medicine | 2 | 0.2 |
| Dartmouth College Geisel School of Medicine | 2 | 0.2 |
| Edward Via College of Osteopathic Medicine- Carolinas Campus | 2 | 0.2 |
| Emory University School of Medicine | 2 | 0.2 |
| Hackensack Meridian School of Medicine | 2 | 0.2 |
| Icahn School of Medicine at Mount Sinai | 2 | 0.2 |
| Loma Linda University School of Medicine | 2 | 0.2 |
| Ohio University Heritage College of Osteopathic Medicine | 2 | 0.2 |
| Oregon Health & Science University School of Medicine | 2 | 0.2 |
| Rowan University School of Osteopathic Medicine | 2 | 0.2 |
| Rutgers Robert Wood Johnson Medical School | 2 | 0.2 |
| Sanford School of Medicine of the University of South Dakota | 2 | 0.2 |
| Texas Tech University Health Sciences Center School of Medicine | 2 | 0.2 |
| Touro University Nevada College of Osteopathic Medicine | 2 | 0.2 |
| University of Illinois at Urbana-Champaign Carle Illinois College of Medicine | 2 | 0.2 |
| University of Illinois College of Medicine | 2 | 0.2 |
| University of Massachusetts Medical School | 2 | 0.2 |
| University of Nebraska College of Medicine | 2 | 0.2 |
| University of Rochester School of Medicine and Dentistry | 2 | 0.2 |
| University of South Carolina School of Medicine | 2 | 0.2 |
| University of Toledo College of Medicine | 2 | 0.2 |
| Wayne State University School of Medicine | 2 | 0.2 |
| Boonshoft School of Medicine at Wright State University | 1 | 0.1 |
| Boston University School of Medicine | 1 | 0.1 |
| Burrell College of Osteopathic Medicine at New Mexico State University | 1 | 0.1 |
| Campbell University School of Osteopathic Medicine | 1 | 0.1 |
| Central Michigan University College of Medicine | 1 | 0.1 |
| Charles R. Drew University of Medicine and Science | 1 | 0.1 |
| Creighton University School of Medicine | 1 | 0.1 |
| Donald and Barbara Zucker School of Medicine at Hofstra/Northwell | 1 | 0.1 |
| Drexel University College of Medicine | 1 | 0.1 |
| Duke University School of Medicine | 1 | 0.1 |
| Eastern Virginia Medical School | 1 | 0.1 |
| Florida Atlantic University Charles E. Schmidt College of Medicine | 1 | 0.1 |
| Florida State University College of Medicine | 1 | 0.1 |
| Frank H. Netter M.D. School of Medicine at Quinnipiac University | 1 | 0.1 |
| Marian University College of Osteopathic Medicine | 1 | 0.1 |
| Mayo Clinic College of Medicine | 1 | 0.1 |
| Michigan State University College of Osteopathic Medicine | 1 | 0.1 |
| Nova Southeastern University College of Osteopathic Medicine | 1 | 0.1 |
| Oakland University William Beaumont School of Medicine | 1 | 0.1 |
| Rush Medical College | 1 | 0.1 |
| San Juan Bautista School of Medicine | 1 | 0.1 |
| Southern Illinois University School of Medicine | 1 | 0.1 |
| State University of New York Upstate Medical University | 1 | 0.1 |
| The Ohio State University College of Medicine | 1 | 0.1 |
| University of Colorado School of Medicine | 1 | 0.1 |
| University of Kentucky College of Medicine | 1 | 0.1 |
| University of Maryland School of Medicine | 1 | 0.1 |
| University of Minnesota Medical School | 1 | 0.1 |
| University of Nevada, Las Vegas School of Medicine | 1 | 0.1 |
| University of North Carolina School of Medicine | 1 | 0.1 |
| University of Texas Medical Branch School of Medicine | 1 | 0.1 |
| University of Virginia School of Medicine | 1 | 0.1 |
| Washington State University Elson S. Floyd College of Medicine | 1 | 0.1 |
| West Virginia School of Osteopathic Medicine | 1 | 0.1 |
And just to explicitly show the overall N after filtering:
na_data %>%
count(name = "n_overall")
## # A tibble: 1 x 1
## n_overall
## <int>
## 1 1162
demo_overall <-
na_data %>%
count(degree_type, name = "count") %>%
mutate(percentage = (count / sum(count) * 100) %>% round(1)) %>%
bind_rows(
na_data %>%
count(name = "count") %>%
mutate(
percentage = 100,
degree_type = "Full Sample"
)
) %>%
pivot_longer(
cols = c(count, percentage),
names_to = "variable",
values_to = "values"
) %>%
pivot_wider(
names_from = c(degree_type, variable),
values_from = values
) %>%
mutate(variable = "Overall")
demo_breakdown <-
na_data %>%
group_by(degree_type) %>%
select(
starts_with("so_"),
starts_with("gender_"),
starts_with("race_"),
-contains("description"),
-contains("explanation")
) %>%
summarize_all(
.funs = function(x) sum(x == "yes")
) %>%
pivot_longer(
cols = -degree_type,
names_to = "variable",
values_to = "count"
) %>%
group_by(
variable_type =
str_extract(variable, ".+_") %>%
str_sub(end = -2L)
) %>%
mutate(percentage = (count / sum(count) * 100) %>% round(1)) %>%
bind_rows(
na_data %>%
select(
starts_with("so_"),
starts_with("gender_"),
starts_with("race_"),
-contains("description"),
-contains("explanation")
) %>%
summarize_all(
.funs = function(x) sum(x == "yes")
) %>%
pivot_longer(
cols = everything(),
names_to = "variable",
values_to = "count"
) %>%
group_by(
variable_type =
str_extract(variable, ".+_") %>%
str_sub(end = -2L)
) %>%
mutate(
percentage = (count / sum(count) * 100) %>% round(1),
degree_type = "Full Sample"
)
)
demo_year <-
na_data %>%
group_by(degree_type) %>%
select(med_school_year) %>%
count(med_school_year, name = "count") %>%
drop_na() %>%
ungroup() %>%
mutate(percentage = (count / sum(count) * 100) %>% round(1)) %>%
bind_rows(
na_data %>%
count(med_school_year, name = "count") %>%
drop_na() %>%
mutate(
percentage = (count / sum(count) * 100) %>% round(1),
degree_type = "Full Sample"
)
) %>%
transmute(
degree_type,
variable = med_school_year,
variable_type = "med_school_year",
count,
percentage
)
demo_combined <-
bind_rows(demo_breakdown, demo_year) %>%
pivot_longer(
cols = c(count, percentage),
names_to = "my_variable",
values_to = "my_value"
) %>%
pivot_wider(
names_from = c(degree_type, my_variable),
values_from = my_value
) %>%
ungroup() %>%
mutate(
variable_type =
factor(
variable_type,
levels = c("gender", "so", "race", "med_school_year")
)
) %>%
arrange(variable_type, desc(MD_count)) %>%
select(-variable_type)
Here we actually make the table…
recode_values <-
c(
"gender_woman" = "Woman",
"gender_man" = "Man",
"gender_genderqueer" = "Genderqueer/Gender non-conforming",
"gender_transman" = "Transgender Man",
"gender_agender" = "Agender",
"gender_another" = "Another Gender Identity",
"gender_transwoman" = "Transgender Woman",
"so_heterosexual" = "Straight/Heterosexual",
"so_gay" = "Gay",
"so_bisexual" = "Bisexual",
"so_queer" = "Queer",
"so_pansexual" = "Pansexual",
"so_lesbian" = "Lesbian",
"so_questioning" = "Questioning",
"so_asexual" = "Asexual",
"so_another" = "Another Sexual Orientation",
"race_white" = "White/Caucasian",
"race_asian" = "Asian",
"race_hispanic" = "Latino or Hispanic",
"race_black" = "Black or African American",
"race_another" = "Another Race",
"race_native" = "American Indian or Alaska Native",
"race_pi" = "Native Hawaiian or Other Pacific Islander"
)
demo_overall %>%
bind_rows(demo_combined) %>%
transmute(
recode(variable, !!! recode_values),
`Full Sample` = str_glue("{`Full Sample_count`} ({`Full Sample_percentage`}%)"),
`Allopathic (MD)` = str_glue("{MD_count} ({MD_percentage}%)"),
`Osteopathic (DO)` = str_glue("{DO_count} ({DO_percentage}%)"),
) %>%
kable(col.names = c(" ", colnames(.)[-1])) %>%
kable_styling(
bootstrap_options = c("striped", "condensed"),
full_width = F
) %>%
row_spec(1, bold = T) %>%
pack_rows("Gender", 2, 8, label_row_css = "background-color: #666; color: #fff;") %>%
pack_rows("Sexual Orientation", 9, 17, label_row_css = "background-color: #666; color: #fff;") %>%
pack_rows("Race/Ethnicity", 18, 24, label_row_css = "background-color: #666; color: #fff;") %>%
pack_rows("Year in School", 25, 28, label_row_css = "background-color: #666; color: #fff;")
| Full Sample | Allopathic (MD) | Osteopathic (DO) | |
|---|---|---|---|
| Overall | 1162 (100%) | 1082 (93.1%) | 80 (6.9%) |
| Gender | |||
| Woman | 674 (55.6%) | 617 (50.9%) | 57 (4.7%) |
| Man | 433 (35.7%) | 411 (33.9%) | 22 (1.8%) |
| Genderqueer/Gender non-conforming | 72 (5.9%) | 67 (5.5%) | 5 (0.4%) |
| Transgender Man | 13 (1.1%) | 13 (1.1%) | 0 (0%) |
| Agender | 11 (0.9%) | 11 (0.9%) | 0 (0%) |
| Another Gender Identity | 7 (0.6%) | 6 (0.5%) | 1 (0.1%) |
| Transgender Woman | 3 (0.2%) | 3 (0.2%) | 0 (0%) |
| Sexual Orientation | |||
| Straight/Heterosexual | 521 (36%) | 498 (34.4%) | 23 (1.6%) |
| Gay | 239 (16.5%) | 221 (15.3%) | 18 (1.2%) |
| Bisexual | 224 (15.5%) | 208 (14.4%) | 16 (1.1%) |
| Queer | 215 (14.9%) | 200 (13.8%) | 15 (1%) |
| Pansexual | 82 (5.7%) | 72 (5%) | 10 (0.7%) |
| Lesbian | 82 (5.7%) | 70 (4.8%) | 12 (0.8%) |
| Questioning | 37 (2.6%) | 33 (2.3%) | 4 (0.3%) |
| Asexual | 34 (2.4%) | 31 (2.1%) | 3 (0.2%) |
| Another Sexual Orientation | 12 (0.8%) | 11 (0.8%) | 1 (0.1%) |
| Race/Ethnicity | |||
| White/Caucasian | 870 (67.3%) | 812 (62.8%) | 58 (4.5%) |
| Asian | 184 (14.2%) | 169 (13.1%) | 15 (1.2%) |
| Latino or Hispanic | 127 (9.8%) | 113 (8.7%) | 14 (1.1%) |
| Black or African American | 71 (5.5%) | 71 (5.5%) | 0 (0%) |
| Another Race | 18 (1.4%) | 18 (1.4%) | 0 (0%) |
| American Indian or Alaska Native | 13 (1%) | 12 (0.9%) | 1 (0.1%) |
| Native Hawaiian or Other Pacific Islander | 9 (0.7%) | 4 (0.3%) | 5 (0.4%) |
| Year in School | |||
| Pre-Clinical Student (prior to clerkships) | 711 (61.2%) | 663 (57.1%) | 48 (4.1%) |
| Clinical Student (on clerkships) | 374 (32.2%) | 344 (29.6%) | 30 (2.6%) |
| Research (PhD, Masters, or other) | 50 (4.3%) | 49 (4.2%) | 1 (0.1%) |
| Other | 26 (2.2%) | 25 (2.2%) | 1 (0.1%) |
Outness plots
context_recode <-
c(
"classmates_peers" = "To peers",
"labmates_coworkers_team" = "To lab-mates or team members",
"mentors" = "To mentors",
"medical_school_app" = "On application to medical school",
"residency_app" = "On application to residency/post-doctoral studies"
)
n <-
na_data %>%
drop_na(school_attend, is_lgbtq) %>%
nrow()
outness_data <-
na_data %>%
select(
starts_with("out_"),
starts_with("ability_out_"),
starts_with("protections_out"),
sex,
is_urm,
is_gm,
school_attend,
is_lgbtq
) %>%
select(
-contains("explanation"),
-contains("other")
) %>%
drop_na(school_attend, is_lgbtq) %>%
pivot_longer(
cols =
c(
starts_with("out_"),
starts_with("ability_out_"),
starts_with("protections_out")
),
names_to = c("variable", "context"),
values_to = "is_out",
names_sep = "ut_"
) %>%
mutate(
variable =
recode(
variable,
"o" = "Out (or plan to be out)",
"ability_o" = "Support ability to be out",
"protections_o" = "Support protections for outness"
),
context =
recode(
context,
!!! context_recode
)
)
outness_data %>%
group_by(is_lgbtq, variable, context) %>%
summarize(prop = sum(is_out == "yes") / n()) %>%
mutate(context = fct_reorder(context, desc(prop))) %>%
ggplot(aes(x = context, y = prop, fill = is_lgbtq)) +
geom_col(position = "dodge", color = "black") +
geom_text(
aes(label = (prop * 100) %>% round(0)),
vjust = -0.2,
size = 4,
position = position_dodge(width = 0.9),
fontface = "bold"
) +
facet_wrap(vars(variable), ncol = 3) +
scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
scale_fill_manual(
breaks = c("LGBTQ+", "Non-LGBTQ+"),
values = c(mspa_hex, "gray60")
) +
theme(
legend.position = "bottom",
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
) +
labs(
x = NULL,
y = "% of Students",
fill = NULL,
caption = str_glue("N = {n}")
)
Alter na_data to include a variable that we will use later
na_data <-
na_data %>%
mutate(
gender_expansive =
if_else(
gender_genderqueer == "yes" |
gender_agender == "yes" |
gender_transman == "yes" |
gender_transwoman == "yes" |
gender_another == "yes",
"yes",
"no"
),
gender_nonexpansive =
if_else(
gender_expansive == "no",
"yes",
"no"
)
)
so_vars <-
colnames(na_data)[
setdiff(
starts_with(match = "so_", vars = colnames(na_data)),
c(
ends_with(match = "_description", vars = colnames(na_data)),
ends_with(match = "_explanation", vars = colnames(na_data))
)
)
]
demographic_vars <-
c("so_", "gender_", "race_") %>%
map(
~ setdiff(
starts_with(match = ., vars = colnames(na_data)),
ends_with(match = "_description", vars = colnames(na_data))
)
) %>%
unlist() %>%
colnames(na_data)[.]
tally_outness <- function(my_var) {
na_data %>%
filter(is_lgbtq == "LGBTQ+") %>%
filter(!! sym(my_var) == "yes") %>%
select(starts_with("out_")) %>%
select(-ends_with("_explanation"), -ends_with("_other")) %>%
summarize_all(
#list(
#count = function(x) sum(x == "yes"),
#percentage =
function(x) sum(x == "yes") / length(x) * 100
#)
) %>%
pivot_longer(
cols = everything(),
names_to = "environment",
values_to = "percent_out",
names_prefix = "out_"
)
}
outness_subgroup <-
tibble(identity = demographic_vars) %>%
mutate(
data = map(identity, .f = tally_outness)
) %>%
unnest() %>%
separate(col = identity, into = c("var_type", "identity"), sep = "_")
# calculate total number of people in each demographic subgroup
demographics_n <-
na_data %>%
filter(is_lgbtq == "LGBTQ+") %>%
summarize_at(
.vars = vars(starts_with("so_"), starts_with("gender_"), starts_with("race")),
~ sum(. == "yes")
) %>%
select(-contains("another")) %>%
pivot_longer(
cols = everything(),
names_to = "identity",
values_to = "n",
) %>%
separate(col = identity, into = c("var_type", "identity"))
demographics_n
## # A tibble: 22 x 3
## var_type identity n
## <chr> <chr> <int>
## 1 so asexual 26
## 2 so bisexual 213
## 3 so gay 237
## 4 so lesbian 81
## 5 so pansexual 78
## 6 so queer 214
## 7 so questioning 13
## 8 so heterosexual 7
## 9 gender man 272
## 10 gender woman 304
## # … with 12 more rows
so_labels <-
function(label) {
new_label <-
str_glue(
"{title} (n = {number})",
title = str_to_title(label),
number =
demographics_n %>%
filter(identity %in% label) %>%
pull(n)
)
new_label
}
overall_outness_data <-
outness_data %>%
filter(is_lgbtq == "LGBTQ+") %>%
group_by(variable, context) %>%
summarize(prop = sum(is_out == "yes") / n()) %>%
mutate(
environment = fct_reorder(context, desc(prop)),
percent_out = (prop * 100) %>% round(1),
identity = "overall"
)
overall_outness_data
## # A tibble: 15 x 6
## # Groups: variable [3]
## variable context prop environment percent_out identity
## <chr> <chr> <dbl> <fct> <dbl> <chr>
## 1 Out (or plan… On application … 0.563 On application to … 56.3 overall
## 2 Out (or plan… On application … 0.289 On application to … 28.9 overall
## 3 Out (or plan… To lab-mates or… 0.591 To lab-mates or te… 59.1 overall
## 4 Out (or plan… To mentors 0.474 To mentors 47.4 overall
## 5 Out (or plan… To peers 0.908 To peers 90.8 overall
## 6 Support abil… On application … 0.917 On application to … 91.7 overall
## 7 Support abil… On application … 0.914 On application to … 91.4 overall
## 8 Support abil… To lab-mates or… 0.949 To lab-mates or te… 94.9 overall
## 9 Support abil… To mentors 0.946 To mentors 94.6 overall
## 10 Support abil… To peers 0.973 To peers 97.3 overall
## 11 Support prot… On application … 0.943 On application to … 94.3 overall
## 12 Support prot… On application … 0.927 On application to … 92.7 overall
## 13 Support prot… To lab-mates or… 0.941 To lab-mates or te… 94.1 overall
## 14 Support prot… To mentors 0.959 To mentors 95.9 overall
## 15 Support prot… To peers 0.959 To peers 95.9 overall
# SO plot
so_plot_frame <-
outness_subgroup %>%
filter(
var_type == "so",
!(identity %in% c("heterosexual", "questioning", "asexual", "another"))
) %>%
mutate(
environment =
recode(environment, !!! context_recode) %>%
fct_reorder(percent_out, .fun = mean, .desc = TRUE),
identity = fct_reorder2(identity, environment, percent_out)
)
so_plot <-
ggplot() +
geom_line(
data = so_plot_frame,
mapping =
aes(
x = environment,
y = percent_out,
color = identity,
group = identity
),
size = 1
) +
geom_point(
data = so_plot_frame,
mapping =
aes(
x = environment,
y = percent_out,
fill = identity
),
shape = 21,
size = 3
) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_y_continuous(
limits = c(15, 105),
labels = scales::label_percent(accuracy = 1, scale = 1)
) +
scale_fill_tableau(labels = so_labels) +
scale_color_tableau(labels = so_labels) +
theme_bw() +
theme(
axis.text.x = element_text(size = 10, angle = 30, hjust = 1)
) +
labs(
subtitle = "Outness by sexual orientation",
x = NULL,
y = "Percentage of \"out\" students",
fill = NULL,
color = NULL
)
so_plot %>%
ggsave(
filename = "so_plot.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 6.5,
height = 5,
units = "in"
)
so_plot
Note that in this plot we only show identities that are >5% of the total number of responses.
so_plot_gray <-
ggplot() +
geom_line(
data = so_plot_frame,
mapping =
aes(
x = environment,
y = percent_out,
group = identity,
linetype = identity
),
color = "gray30",
size = 1
) +
geom_point(
data = so_plot_frame,
mapping =
aes(
x = environment,
y = percent_out,
fill = identity,
shape = identity
),
fill = "black",
color = "gray60",
size = 4
) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_y_continuous(
limits = c(15, 105),
labels = scales::label_percent(accuracy = 1, scale = 1)
) +
scale_shape_manual(
values = 21:25,
labels = so_labels
) +
scale_linetype_discrete(labels = so_labels, guide = FALSE) +
theme_bw() +
theme(
axis.text.x = element_text(size = 10, angle = 30, hjust = 1)
) +
labs(
subtitle = "Outness by sexual orientation",
x = NULL,
y = "Percentage of \"out\" students",
shape = NULL,
linetype = NULL
)
so_plot_gray
so_plot_gray %>%
ggsave(
filename = "so_plot_gray.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 6.5,
height = 5,
units = "in"
)
By gender identity…
gender_plot_frame <-
outness_subgroup %>%
filter(identity %in% c("expansive", "nonexpansive")) %>%
mutate(
environment =
recode(environment, !!! context_recode) %>%
factor(levels = levels(so_plot_frame$environment)),
identity =
identity %>%
recode(expansive = "Gender-expansive/Gender-diverse", nonexpansive = "Cisgender") %>%
str_wrap(width = 17) %>%
fct_reorder2(environment, percent_out)
)
gender_plot <-
gender_plot_frame %>%
ggplot(aes(x = environment, y = percent_out, fill = identity)) +
geom_line(aes(color = identity, group = identity), alpha = 0.8, size = 1) +
geom_point(shape = 21, size = 3, alpha = 1) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_y_continuous(
limits = c(15, 105),
labels = scales::label_percent(accuracy = 1, scale = 1),
) +
scale_fill_tableau(
labels =
function(x) {
str_glue(
"{x} (n = {num})",
num =
demographics_n %>%
mutate(
identity =
identity %>%
recode(
expansive = "Gender-expansive/Gender-diverse",
nonexpansive = "Cisgender"
) %>%
str_wrap(width = 17)
) %>%
filter(identity %in% x) %>%
pull(n)
)
}
) +
scale_color_tableau(
labels =
function(x) {
str_glue(
"{x} (n = {num})",
num =
demographics_n %>%
mutate(
identity =
identity %>%
recode(
expansive = "Gender-expansive/Gender-diverse",
nonexpansive = "Cisgender"
) %>%
str_wrap(width = 17)
) %>%
filter(identity %in% x) %>%
pull(n)
)
}
) +
theme_bw() +
theme(
axis.text.x = element_text(size = 10, angle = 30, hjust = 1)
) +
labs(
subtitle = "Outness by gender identity",
x = NULL,
y = "Percentage of \"out\" students",
fill = NULL,
color = NULL
)
gender_plot
gender_plot %>%
ggsave(
filename = "gender_plot.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 6.5,
height = 5,
units = "in"
)
gender_labels <-
function(x) {
str_glue(
"{x} (n = {num})",
num =
demographics_n %>%
mutate(
identity =
identity %>%
recode(
expansive = "Gender-expansive/Gender-diverse",
nonexpansive = "Cisgender"
) %>%
str_wrap(width = 17)
) %>%
filter(identity %in% x) %>%
pull(n)
)
}
gender_plot_gray <-
gender_plot_frame %>%
ggplot(aes(x = environment, y = percent_out)) +
geom_line(aes(color = identity, group = identity), alpha = 0.8, size = 1) +
geom_point(
aes(fill = identity),
color = "black",
shape = 21,
size = 3,
alpha = 1
) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_y_continuous(
limits = c(15, 105),
labels = scales::label_percent(accuracy = 1, scale = 1)
) +
scale_shape_manual(values = 21:22, labels = gender_labels) +
scale_color_manual(values = c("black", "gray"), labels = gender_labels) +
scale_fill_manual(values = c("black", "gray"), labels = gender_labels) +
theme_bw() +
theme(
axis.text.x = element_text(size = 10, angle = 30, hjust = 1)
) +
labs(
subtitle = "Outness by gender identity",
x = NULL,
y = "Percentage of \"out\" students",
fill = NULL,
color = NULL
)
gender_plot_gray
gender_plot_gray %>%
ggsave(
filename = "gender_plot_gray.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 6.5,
height = 5,
units = "in"
)
Support for outness plots:
context_order <-
outness_data %>%
filter(variable == "Out (or plan to be out)") %>%
group_by(context) %>%
summarize(prop = sum(is_out == "yes") / n()) %>%
arrange(desc(prop)) %>%
pull(context)
support_plot <-
outness_data %>%
filter(!(variable == "Out (or plan to be out)" & is_lgbtq != "LGBTQ+")) %>%
group_by(variable, context) %>%
summarize(prop = sum(is_out == "yes") / n()) %>%
mutate(
environment = factor(context, levels = context_order),
percent_out = (prop * 100) %>% round(1),
identity = "overall"
) %>%
ggplot(aes(x = environment, y = percent_out)) +
geom_col(
aes(fill = variable),
position = "dodge",
color = "black",
size = 0.6
) +
geom_text(
aes(
group = variable,
label = str_glue("{num}%", num = percent_out %>% round(0))
),
size = 4,
position = position_dodge(width = 0.91),
fontface = "bold",
vjust = -0.3
) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
scale_y_continuous(
labels = scales::label_percent(accuracy = 1, scale = 1),
limits = c(NA, 101)
) +
scale_fill_tableau() +
theme_bw() +
theme(
legend.position = "bottom",
axis.text.x = element_text(size = 10, angle = 30, hjust = 1),
) +
labs(
x = NULL,
y = "% of students",
fill = NULL,
caption = str_glue("N = {nrow(na_data)}")
)
support_plot
support_plot %>%
ggsave(
filename = "support_plot.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 7.25,
height = 5,
units = "in"
)
support_plot_gray <-
support_plot +
scale_fill_tableau(palette = "Classic Gray 5")
support_plot_gray
support_plot_gray %>%
ggsave(
filename = "support_plot_gray.pdf",
plot = .,
device = "pdf",
path = figure_out_path,
width = 7.25,
height = 5,
units = "in"
)
Alluvium Plot
alluvium_plot <-
na_data %>%
filter(is_lgbtq == "LGBTQ+") %>%
count(
out_classmates_peers,
out_medical_school_app,
out_residency_app,
out_mentors,
out_labmates_coworkers_team
) %>%
ggplot(
aes(
y = n,
axis1 = out_medical_school_app,
axis2 = out_classmates_peers,
axis3 = out_labmates_coworkers_team,
axis4 = out_mentors,
axis5 = out_residency_app
)
) +
geom_alluvium(aes(fill = out_medical_school_app), alpha = 0.65) +
geom_stratum(width = 1/3, fill = "white", color = "black") +
geom_text(stat = "stratum", infer.label = TRUE, size = 3) +
scale_x_discrete(
limits =
c(
"out_medical_school_app",
"out_classmates_peers",
"out_labmates_coworkers_team",
"out_mentors",
"out_residency_app"
),
labels =
c(
"Out on medical school application?",
"Out to peers?",
"Out to lab-mates/team-members?",
"Out to mentors?",
"Out on residency application?"
) %>%
str_wrap(width = 20),
expand = c(0.1, 0.1)
) +
scale_y_continuous(
labels = scales::label_number()
) +
scale_fill_tableau() +
theme_minimal() +
theme(legend.position = "none") +
labs(
subtitle = "Over time, medical students who were \"out\" on their applications to\nmedical school become less professionally \"out\"",
x = NULL,
y = "Number of students",
fill = NULL,
caption = str_glue("N = {(na_data$is_lgbtq == \"LGBTQ+\") %>% sum(na.rm = TRUE)}")
)
alluvium_plot
alluvium_plot %>%
ggsave(
filename = "outness_alluvium.pdf",
plot = .,
device = "pdf",
path = file.path(figure_out_path),
width = 6,
height = 4.5,
units = "in"
)
contingency_table <-
table(
outness_data %>%
filter(is_lgbtq == "LGBTQ+", variable == "Out (or plan to be out)") %>%
pull(context),
outness_data %>%
filter(is_lgbtq == "LGBTQ+", variable == "Out (or plan to be out)") %>%
pull(is_out)
)
# chi-squared test for independence
contingency_table %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 518.29, df = 4, p-value < 2.2e-16
p_values <-
matrix(nrow = nrow(contingency_table), ncol = nrow(contingency_table))
chi_squared_values <-
matrix(nrow = nrow(contingency_table), ncol = nrow(contingency_table))
row.names(p_values) <- row.names(contingency_table)
colnames(p_values) <- row.names(contingency_table)
row.names(chi_squared_values) <- row.names(contingency_table)
colnames(chi_squared_values) <- row.names(contingency_table)
for (i in 1:5) {
for (j in 1:5) {
contingency_table[c(i,j),] %>%
chisq.test() %>%
`$`(p.value) ->
p_values[i,j]
contingency_table[c(i,j),] %>%
chisq.test() %>%
`$`(statistic) ->
chi_squared_values[i,j]
}
}
#bonferroni correction
p_values <- p_values * 10
p_values %>%
knitr::kable() %>%
kable_styling()
| On application to medical school | On application to residency/post-doctoral studies | To lab-mates or team members | To mentors | To peers | |
|---|---|---|---|---|---|
| On application to medical school | 10.0000000 | 0 | 3.3194253 | 0.0191287 | 0 |
| On application to residency/post-doctoral studies | 0.0000000 | 10 | 0.0000000 | 0.0000000 | 0 |
| To lab-mates or team members | 3.3194253 | 0 | 10.0000000 | 0.0003705 | 0 |
| To mentors | 0.0191287 | 0 | 0.0003705 | 10.0000000 | 0 |
| To peers | 0.0000000 | 0 | 0.0000000 | 0.0000000 | 10 |
chi_squared_values %>%
knitr::kable() %>%
kable_styling()
| On application to medical school | On application to residency/post-doctoral studies | To lab-mates or team members | To mentors | To peers | |
|---|---|---|---|---|---|
| On application to medical school | 0.0000000 | 95.0541 | 0.9413047 | 9.63132 | 190.5470 |
| On application to residency/post-doctoral studies | 95.0541045 | 0.0000 | 115.2183962 | 44.55080 | 498.0326 |
| To lab-mates or team members | 0.9413047 | 115.2184 | 0.0000000 | 17.01666 | 166.0308 |
| To mentors | 9.6313197 | 44.5508 | 17.0166565 | 0.00000 | 275.3272 |
| To peers | 190.5470270 | 498.0326 | 166.0308438 | 275.32717 | 0.0000 |
#to peers
table(
outness_data %>%
filter(context == "To peers") %>%
pull(variable),
outness_data %>%
filter(context == "To peers") %>%
pull(is_out)
) %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 1021.2, df = 2, p-value < 2.2e-16
#to labmates
table(
outness_data %>%
filter(context == "To lab-mates or team members") %>%
pull(variable),
outness_data %>%
filter(context == "To lab-mates or team members") %>%
pull(is_out)
) %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 1468.6, df = 2, p-value < 2.2e-16
# MD application
table(
outness_data %>%
filter(context == "On application to medical school") %>%
pull(variable),
outness_data %>%
filter(context == "On application to medical school") %>%
pull(is_out)
) %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 1223.6, df = 2, p-value < 2.2e-16
#to mentors
table(
outness_data %>%
filter(context == "To mentors") %>%
pull(variable),
outness_data %>%
filter(context == "To mentors") %>%
pull(is_out)
) %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 1664.9, df = 2, p-value < 2.2e-16
#to GME
table(
outness_data %>%
filter(context == "On application to residency/post-doctoral studies") %>%
pull(variable),
outness_data %>%
filter(context == "On application to residency/post-doctoral studies") %>%
pull(is_out)
) %>%
chisq.test()
##
## Pearson's Chi-squared test
##
## data: .
## X-squared = 1745.3, df = 2, p-value < 2.2e-16